/*
      file: basehtmlparser.h
      desc: simple html parser, return every tag and its attributes and texts
      author: chen hua
      create: 2005-03-16
*/
#ifndef _BaseHtmlParser_H_
#define _BaseHtmlParser_H_
#include <vector>
#include <string>
using namespace std;


class CBaseHtmlParser
{
public:
    //!struct to save a string, with a pointer and a size
    struct SZ_STRING
    {
        const char *pbData;
        size_t cbData;
    };

public:
    CBaseHtmlParser();
    virtual ~CBaseHtmlParser();

    //Init or Destroy, do nothing here now
    virtual void Initialize() {}
    virtual void Destroy() {}

    //Two interface to parser html page
    virtual bool Parse( const string &URL, const string &Content );
    virtual bool Parse( const SZ_STRING &strUrl, const SZ_STRING &strContent );

    //Util api for get a absolute url based on current page
    void Relativity2AbsoluteURL( string &URL );

    //event when a tag begin, such as '<a href=..' , then strTagName is 'a', Attribs contains 'href'
    virtual void OnStartTag( const SZ_STRING &strTagName, vector< pair<SZ_STRING, SZ_STRING> > Attribs ) {};
    //event when a tag close, such as '</a>', then strTagName is 'a'
    virtual void OnEndTag( const SZ_STRING &strTagName ) {};
    //event when text between tags, such as '<>hello<>', then strData is 'hello'
    virtual void OnData( const SZ_STRING &strData ) {};
    //event when script or comment, such as '<!-- .../-->' or '<script ..> </script>'
    virtual void OnComment( const SZ_STRING &strComment ) {};
private:
    char m_szBaseURL[1024];
    char m_szBaseDomain[1024];
};

#endif

